• Data on 1000 trees from 10 sites. • Trees per site: 4 - 392.
library(curl)
## Using libcurl 8.4.0 with LibreSSL/3.3.6
trees <- read.csv(curl("https://raw.githubusercontent.com/orrortega/statistics-natural-resources/main/Day_II/Data/trees.csv"))
head(trees)
## site dbh height sex dead
## 1 4 29.68 36.1 male 0
## 2 5 33.29 42.3 male 0
## 3 2 28.03 41.9 female 0
## 4 5 39.86 46.5 female 0
## 5 1 47.94 43.9 female 0
## 6 1 10.82 26.2 male 0
trees$site <- as.factor(trees$site)
lm.simple <- lm(height ~ dbh, data = trees)
summary(lm.simple)
##
## Call:
## lm(formula = height ~ dbh, data = trees)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3270 -2.8978 0.1057 2.7924 12.9511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.33920 0.31064 62.26 <2e-16 ***
## dbh 0.61570 0.01013 60.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.093 on 998 degrees of freedom
## Multiple R-squared: 0.7874, Adjusted R-squared: 0.7871
## F-statistic: 3695 on 1 and 998 DF, p-value: < 2.2e-16
library(ggplot2)
ggplot(trees) +
aes(dbh, height) +
geom_point() +
geom_smooth(method = "lm", size = 3) +
labs(x = "DBH (cm)", y = "Height (m)", title = "Single intercept") + theme_minimal(base_size = 16)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
ggplot(subset(trees, site == 1 | site == 2)) +
aes(dbh, height, colour = site) +
geom_point() +
geom_smooth(method = "lm", size = 3) +
labs(x = "DBH (cm)", y = "Height (m)",
title = "Different intercept for each site") +
theme_minimal(base_size = 16) +
theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'
lm.interc <- lm(height ~ factor(site) + dbh, data = trees)
summary(lm.interc)
##
## Call:
## lm(formula = height ~ factor(site) + dbh, data = trees)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1130 -1.9885 0.0582 2.0314 11.3320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.699037 0.260565 64.088 < 2e-16 ***
## factor(site)2 6.504303 0.256730 25.335 < 2e-16 ***
## factor(site)3 4.357457 0.354181 12.303 < 2e-16 ***
## factor(site)4 1.934650 0.356102 5.433 6.98e-08 ***
## factor(site)5 3.637432 0.339688 10.708 < 2e-16 ***
## factor(site)6 4.204511 0.421906 9.966 < 2e-16 ***
## factor(site)7 -0.176193 0.666772 -0.264 0.7916
## factor(site)8 -5.312648 0.893603 -5.945 3.82e-09 ***
## factor(site)9 5.437049 1.087766 4.998 6.84e-07 ***
## factor(site)10 2.263338 1.369986 1.652 0.0988 .
## dbh 0.617075 0.007574 81.473 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.043 on 989 degrees of freedom
## Multiple R-squared: 0.8835, Adjusted R-squared: 0.8823
## F-statistic: 750 on 10 and 989 DF, p-value: < 2.2e-16
ggplot(trees) +
aes(dbh, height) +
geom_point() +
geom_smooth(method = "lm", size = 3) +
labs(x = "DBH (cm)", y = "Height (m)", title = "Single intercept") + theme_minimal(base_size = 16)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(trees) +
aes(dbh, height, colour = site) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", size = 1.5, se = FALSE) +
labs(x = "DBH (cm)", y = "Height (m)", title = "Different intercept for each site") +
theme_minimal(base_size = 16) + theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'
• Varying intercepts
• Varying slopes
Mixed models estimate varying parameters (intercepts and/or slopes) with pooling among levels (rather than considering them fully independent)
Hence there’s gradient between
• complete pooling: Single overall intercept.
– lm (height ~ dbh)
• no pooling: One independent intercept for each site.
– lm (height ~ dbh + site)
• partial pooling: Inter-related intercepts.
– lmer(height ~ dbh + (1 | site))
Random vs Fixed effects?
Fixed effects constant across individuals, random effects vary.
Effects are fixed if they are interesting in themselves; random if interest in the underlying population.
Fixed when sample exhausts the population; random when the sample is small part of the population.
Random effect if it’s assumed to be a realized value of random variable.
Fixed effects estimated using least squares or maximum likelihood; random effects estimated with shrinkage.
What is a random effect, really?
• Varies by group
• Variation estimated with probability model
Random effects are estimated with partial pooling, while fixed effects are not (infinite variance).
Shrinkage improves parameter estimation
Especially for groups with low sample size
library(lme4)
## Loading required package: Matrix
mixed <- lmer(height ~ dbh + (1|site), data = trees)
summary(mixed)
## Linear mixed model fit by REML ['lmerMod']
## Formula: height ~ dbh + (1 | site)
## Data: trees
##
## REML criterion at convergence: 5108.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3199 -0.6607 0.0227 0.6716 3.7328
##
## Random effects:
## Groups Name Variance Std.Dev.
## site (Intercept) 11.195 3.346
## Residual 9.261 3.043
## Number of obs: 1000, groups: site, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 19.011468 1.100444 17.28
## dbh 0.616927 0.007572 81.47
##
## Correlation of Fixed Effects:
## (Intr)
## dbh -0.197
coef(mixed)
## $site
## (Intercept) dbh
## 1 16.70800 0.6169271
## 2 23.19162 0.6169271
## 3 21.04229 0.6169271
## 4 18.64086 0.6169271
## 5 20.32995 0.6169271
## 6 20.88200 0.6169271
## 7 16.61686 0.6169271
## 8 11.88302 0.6169271
## 9 21.84779 0.6169271
## 10 18.97228 0.6169271
##
## attr(,"class")
## [1] "coef.mer"
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
allEffects(mixed)
## model: height ~ dbh
##
## dbh effect
## dbh
## 5 20 30 40 50
## 22.09610 31.35001 37.51928 43.68855 49.85782
plot(allEffects(mixed))
library(visreg)
visreg(mixed, xvar = "dbh", by = "site", overlay = FALSE)
library(ggplot2)
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
theme_set(theme_minimal(base_size = 16))
#sjp.lmer(mixed, type = "ri.slope")
#plot_model(mixed, type = "eff")
sjPlot::plot_model(mixed, type = "re")
## Warning in checkDepPackageVersion(dep_pkg = "TMB"): Package version inconsistency detected.
## glmmTMB was built with TMB version 1.9.6
## Current TMB version is 1.9.10
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
plot(mixed)
Checking residuals
ggResidpanel::resid_panel(mixed)
DHARMa::simulateResiduals(mixed, plot = TRUE, use.u = TRUE)
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.444 0.68 0.724 0.712 0.216 0.824 0.488 0.508 0.616 0.736 0.704 0.992 0.672 0.708 0.624 0.784 0.98 0.904 0.328 0.668 ...